################################################################################
# Created By:Pietryka
# Creation Date:  2016-09-09
# Purpose: Cleans the CCES 2010-2014 three-wave panel data
# Questions: mpietryka@fsu.edu
# Notes: Before running this code, download the data:
#        Schaffner, Brian; Ansolabehere, Stephen, 2015,
#        "2010-2014 Cooperative Congressional Election Study Panel Survey",
#        Harvard Dataverse, V5
#        https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/TOE8I1&version=5.0
################################################################################


# PREAMBLE =============================================



# LOAD PACKAGES -----------------
library(tidyr)
library(dplyr)
library(haven)



# LOAD DATA -----------------

# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/TOE8I1&version=5.0

cces <- read_dta("Data/CCES14_Panel_3waves_VV.dta")



# CLEAN  =============================================

# RENAME VARS  -----------------
cces <- cces  %>%
  rename(contacted_10 = CC10_425a,
         contacted_12 = CC12_425a,
         contacted_14 = CC14_425a,
         churchfreq_10 = pew_churatd_10,
         churchfreq_12 = pew_churatd_12,
         churchfreq_14 = pew_churatd_14)


# RECODE FAMILY INCOME  -----------------

cces <- cces %>%
  mutate(faminc2_10 = ifelse(faminc_10 == 3, 2, faminc_10),
         faminc2_10 = ifelse(faminc_10 == 4 | faminc_10 == 5, 3, faminc2_10),
         faminc2_10 = ifelse(faminc_10 > 5, faminc_10 - 2, faminc2_10),
         faminc2_10 = ifelse(faminc_10 == 15, NA, faminc2_10),
         faminc2_12 = ifelse(faminc_12 > 12 & faminc_12 < 97, 12, faminc_12),
         faminc2_12 = ifelse(faminc_12 == 97, NA, faminc2_12),
         faminc2_14 = ifelse(faminc_14 > 12 & faminc_14 < 97, 12, faminc_14),
         faminc2_14 = ifelse(faminc_14 == 97, NA, faminc2_14)
         )

# TURNOUT VARIABLE  -----------------
cces <- cces  %>%
  mutate_at(
    vars(turnout_10 = VV_general_10, turnout_14 = VV_general_14),
    funs(recode (as.numeric(.),
                              "1" = 1,    # Absentee voter
                              "2" = 1,    # Early voter
                              "3" = 1,    # Mail voter
                              "4" = 1,    # In person or unknown method
                              "6" = 0,    # Confirmed Non-voter
                              "9" = 0,    # Unmatched,
                              .default = NA_real_)))  %>%
  mutate(turnout_14 = if_else(validated_reg_14 == 3 |
                              validated_reg_14 == 4 |
                              validated_reg_14 == 9 ,
                              0,
                              turnout_14))

# PARTICIPATION VARIABLES  -----------------
recode_binary <- function(x){
  recode(as.numeric(x), `1` = 1L, `2` = 0L, .default = NA_integer_)
  }

cces <- cces  %>%
  # 2010
  mutate(attendmeeting_10 = recode_binary(CC10_417a_1)) %>%
  mutate(polsign_10 = recode_binary(CC10_417a_2))       %>%
  mutate(workcampaign_10 = recode_binary(CC10_417a_3))  %>%
  mutate(donatemoney_10 = recode_binary(CC10_417a_4))   %>%
  # 2014
  mutate(attendmeeting_14 = recode_binary(CC14_417a_1)) %>%
  mutate(polsign_14 = recode_binary(CC14_417a_2))       %>%
  mutate(workcampaign_14 = recode_binary(CC14_417a_3))  %>%
  mutate(donatemoney_14 = recode_binary(CC14_417a_4))


# SUBSET DATA  -----------------
# RELEVANT VARIABLES
keep_vars <-  c(
    "marstat", # marital status (categorical; missing: 8, 9)
    "employ", # employment (categorical; missing: 98, 99)
    "educ", # education (ordinal, missing: 8, 9)
    "faminc2", # family income, recoded above
    "turnout", # validated voting (dummy coded above)
    "child18", # child under 18? (1 = 1, 2 = 0; missing: 8, 9)
    "countyfips", # pre-election county ID (no recode, missing: 'none')
    "countyfips_post", # post-election county ID (no recode, missing: 'none')
    "ownhome", # own home? (1 = 1, 2/3 = 0; missing: 8, 9),
   # PARTICIPATION
    "attendmeeting",
    "polsign",
    "workcampaign",
    "donatemoney",
   # EXTRA VARS TO IMPROVE IMPUTATION
    "contacted", # party contact (1 Yes, 2 No, missing 8, 9)
    "churchfreq", # church attendance (ordinal, missing 8, 9)
    "newsint", # intertest (ordinal, missing 8, 9)
    "birthyr", # birth year (missing: 9998, 9999)
    "gender" # gender (1 = 0, 2 = 1; missing: 8, 9)
    )  %>%
  expand.grid(c(10, 14))  %>%
  with(paste(Var1, Var2, sep = "_"))

cces_subset <- cces  %>%
  # CREATE VARIABLES TO APPLY BOB's EXCLUSION CRITERIA
  mutate(morethanone = if_else((child18num_14 > 1 | is.na(child18num_14)) &
                                 child18_14 == 1, 1, 0, missing = 0))  %>%
  # SELECT VARIABLES
  select(caseid = caseid_14, morethanone,  faminc2_12, one_of(keep_vars))  %>%
  # TWO CASES W/ UNKNOWN COUNTY TO MISSING
    mutate(countyfips_post_14 = ifelse(countyfips_post_14 == "None",
                                       NA,
                                       countyfips_post_14))  %>%
  # VARIABLE IDENTIFYING PEOPLE WHO MOVED B/W 2010-PRE and 2014-POST
    mutate(countymove = if_else(countyfips_10 == countyfips_post_14,
                                0L,
                                1L,
                                NA_integer_))  %>%
    select(-contains("fips"))  %>%
  # ONE OBS. PER VARIABLE PER PERSON PER YEAR
    gather(variable, value, -c(caseid, countymove, morethanone))  %>%
    separate(variable,
             into = c("label", "year"),
             sep = "_")  %>%
  # ONE OBS. PER PERSON PER YEAR
    spread(label, value, convert = TRUE)  %>%
  # CLEAN YEAR VARIABLE
    mutate(year = recode(year, "10" = 2010, "12" = 2012, "14" = 2014))  %>%
    select(caseid, year, morethanone, everything())


# RECODE VARIABLES  -----------------
cces_subset  %>% select(-1)  %>%  apply(2, table, useNA = "always")

cces_subset  <- cces_subset  %>%
  mutate(homeowner = recode(
    ownhome,
    "1" = 1L,
    "2" = 0L,
    "3" = 0L,
    .default = NA_integer_
  ))  %>%
  mutate(parent = recode(
    child18,
    "1" = 1L,
    "2" = 0L,
    .default = NA_integer_
  ))  %>%
  mutate(age = year - birthyr)  %>%
  mutate(female = recode(
    gender,
    "1" = 0L,
    "2" = 1L,
    .default = NA_integer_
  ))  %>%
  mutate(contacted = recode(
    contacted,
    "1" = 1L,
    "2" = 0L,
    .default = NA_integer_
  ))  %>%
  mutate(churchfreq = recode(
    churchfreq,
    "1" = 5L, # More than once a week
    "2" = 4L, # Once a week
    "3" = 3L, # Once or twice a month
    "4" = 2L, # A few times a year
    "5" = 1L, # Seldom
    "6" = 0L, # Never
    .default = NA_integer_
  ))  %>%
  mutate(newsint = recode(
    newsint,
    "1" = 3L, # Most of the time
    "2" = 2L, # Some of the time
    "3" = 1L, # Only now and then
    "4" = 0L, # Hardly at all
    .default = NA_integer_
  ))

# Save ====================================================

save(cces_subset, file = "Data/cces_subset.RData")

